{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import joblib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(33465, 5460)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = joblib.load('../../scrum_data/train_data/train_null_sign.lz4')\n",
    "data_label = pd.read_csv('../../preprocess_data/train_y_33465.csv',usecols=['label'])\n",
    "x = data.fillna(-1).values\n",
    "y = data_label.values\n",
    "x.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "import xgboost as xgb\n",
    "dtrain = xgb.DMatrix(x,y,feature_names=data.columns)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0]\ttrain-auc:0.652595\n",
      "[10]\ttrain-auc:0.695954\n",
      "[20]\ttrain-auc:0.714873\n",
      "[30]\ttrain-auc:0.730022\n",
      "[40]\ttrain-auc:0.740691\n",
      "[50]\ttrain-auc:0.746111\n",
      "[60]\ttrain-auc:0.753499\n",
      "[70]\ttrain-auc:0.759688\n",
      "[80]\ttrain-auc:0.764973\n",
      "[90]\ttrain-auc:0.769682\n",
      "[100]\ttrain-auc:0.773679\n",
      "[110]\ttrain-auc:0.77747\n",
      "[120]\ttrain-auc:0.78166\n",
      "[130]\ttrain-auc:0.785796\n",
      "[140]\ttrain-auc:0.789334\n",
      "[150]\ttrain-auc:0.792995\n",
      "[160]\ttrain-auc:0.79581\n",
      "[170]\ttrain-auc:0.798973\n",
      "[180]\ttrain-auc:0.802515\n",
      "[190]\ttrain-auc:0.805727\n",
      "[199]\ttrain-auc:0.808637\n"
     ]
    }
   ],
   "source": [
    "params={\n",
    "'booster':'gbtree',\n",
    "'objective': 'binary:logistic',\n",
    "'early_stopping_rounds':100,\n",
    "'scale_pos_weight': float(len(y)-np.sum(y))/float(np.sum(y)),  # 负例样本除以正例样本\n",
    "'eval_metric': 'auc',\n",
    "'gamma':0.1,\n",
    "'max_depth':4,\n",
    "'lambda':10,\n",
    "'subsample':0.9,\n",
    "'colsample_bytree':0.9,\n",
    "'eta': 0.04,\n",
    "'seed':2018,\n",
    "'nthread':18\n",
    "    }\n",
    "watchlist  = [(dtrain,'train')]\n",
    "model_33465 = xgb.train(params,dtrain,num_boost_round=200,evals=watchlist, verbose_eval=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['feature_names']"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joblib.dump(model_33465,'model_33465')\n",
    "joblib.dump(list(data.columns),'feature_names')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'null_sign_f409': 34,\n",
       " 'null_sign_f1157': 11,\n",
       " 'null_sign_f16': 80,\n",
       " 'null_sign_f3660': 8,\n",
       " 'null_sign_f5027': 58,\n",
       " 'null_sign_f231': 38,\n",
       " 'null_sign_f5930': 47,\n",
       " 'null_sign_f1066': 17,\n",
       " 'null_sign_f412': 67,\n",
       " 'null_sign_f415': 43,\n",
       " 'null_sign_f4930': 11,\n",
       " 'null_sign_f3227': 3,\n",
       " 'null_sign_f8': 25,\n",
       " 'null_sign_f891': 42,\n",
       " 'null_sign_f477': 49,\n",
       " 'null_sign_f643': 23,\n",
       " 'null_sign_f1520': 8,\n",
       " 'null_sign_f4': 11,\n",
       " 'null_sign_f829': 17,\n",
       " 'null_sign_f1726': 11,\n",
       " 'null_sign_f764': 6,\n",
       " 'null_sign_f6714': 7,\n",
       " 'null_sign_f4386': 11,\n",
       " 'null_sign_f92': 36,\n",
       " 'null_sign_f1127': 3,\n",
       " 'null_sign_f304': 9,\n",
       " 'null_sign_f1250': 5,\n",
       " 'null_sign_f27': 8,\n",
       " 'null_sign_f6390': 2,\n",
       " 'null_sign_f526': 15,\n",
       " 'null_sign_f190': 17,\n",
       " 'null_sign_f22': 2,\n",
       " 'null_sign_f986': 16,\n",
       " 'null_sign_f2335': 6,\n",
       " 'null_sign_f6034': 8,\n",
       " 'null_sign_f2815': 31,\n",
       " 'null_sign_f6340': 6,\n",
       " 'null_sign_f210': 13,\n",
       " 'null_sign_f1025': 12,\n",
       " 'null_sign_f543': 9,\n",
       " 'null_sign_f5814': 2,\n",
       " 'null_sign_f4144': 12,\n",
       " 'null_sign_f36': 38,\n",
       " 'null_sign_f881': 16,\n",
       " 'null_sign_f222': 8,\n",
       " 'null_sign_f2892': 4,\n",
       " 'null_sign_f160': 3,\n",
       " 'null_sign_f830': 3,\n",
       " 'null_sign_f1712': 2,\n",
       " 'null_sign_f3747': 2,\n",
       " 'null_sign_f464': 18,\n",
       " 'null_sign_f2226': 22,\n",
       " 'null_sign_f5120': 8,\n",
       " 'null_sign_f4856': 14,\n",
       " 'null_sign_f3296': 32,\n",
       " 'null_sign_f1113': 26,\n",
       " 'null_sign_f1633': 10,\n",
       " 'null_sign_f636': 11,\n",
       " 'null_sign_f2502': 1,\n",
       " 'null_sign_f2791': 10,\n",
       " 'null_sign_f254': 28,\n",
       " 'null_sign_f116': 11,\n",
       " 'null_sign_f2142': 5,\n",
       " 'null_sign_f2908': 12,\n",
       " 'null_sign_f3700': 29,\n",
       " 'null_sign_f1535': 15,\n",
       " 'null_sign_f3527': 4,\n",
       " 'null_sign_f483': 5,\n",
       " 'null_sign_f4933': 15,\n",
       " 'null_sign_f1771': 12,\n",
       " 'null_sign_f669': 2,\n",
       " 'null_sign_f5572': 28,\n",
       " 'null_sign_f691': 25,\n",
       " 'null_sign_f4287': 2,\n",
       " 'null_sign_f15': 10,\n",
       " 'null_sign_f1907': 17,\n",
       " 'null_sign_f17': 5,\n",
       " 'null_sign_f411': 3,\n",
       " 'null_sign_f385': 46,\n",
       " 'null_sign_f3906': 45,\n",
       " 'null_sign_f176': 31,\n",
       " 'null_sign_f3397': 6,\n",
       " 'null_sign_f3289': 5,\n",
       " 'null_sign_f2137': 1,\n",
       " 'null_sign_f53': 2,\n",
       " 'null_sign_f1092': 3,\n",
       " 'null_sign_f6350': 3,\n",
       " 'null_sign_f2607': 4,\n",
       " 'null_sign_f43': 7,\n",
       " 'null_sign_f2037': 1,\n",
       " 'null_sign_f170': 2,\n",
       " 'null_sign_f880': 1,\n",
       " 'null_sign_f667': 5,\n",
       " 'null_sign_f777': 10,\n",
       " 'null_sign_f1970': 17,\n",
       " 'null_sign_f2220': 2,\n",
       " 'null_sign_f681': 16,\n",
       " 'null_sign_f349': 26,\n",
       " 'null_sign_f775': 6,\n",
       " 'null_sign_f670': 18,\n",
       " 'null_sign_f2342': 5,\n",
       " 'null_sign_f217': 1,\n",
       " 'null_sign_f357': 2,\n",
       " 'null_sign_f127': 6,\n",
       " 'null_sign_f3469': 5,\n",
       " 'null_sign_f804': 3,\n",
       " 'null_sign_f3361': 7,\n",
       " 'null_sign_f267': 10,\n",
       " 'null_sign_f191': 6,\n",
       " 'null_sign_f4260': 7,\n",
       " 'null_sign_f60': 28,\n",
       " 'null_sign_f1811': 7,\n",
       " 'null_sign_f585': 4,\n",
       " 'null_sign_f5825': 7,\n",
       " 'null_sign_f1349': 1,\n",
       " 'null_sign_f553': 21,\n",
       " 'null_sign_f5763': 3,\n",
       " 'null_sign_f1687': 6,\n",
       " 'null_sign_f2213': 1,\n",
       " 'null_sign_f4438': 1,\n",
       " 'null_sign_f3524': 18,\n",
       " 'null_sign_f5616': 3,\n",
       " 'null_sign_f978': 9,\n",
       " 'null_sign_f3030': 3,\n",
       " 'null_sign_f592': 4,\n",
       " 'null_sign_f416': 26,\n",
       " 'null_sign_f972': 3,\n",
       " 'null_sign_f2005': 8,\n",
       " 'null_sign_f876': 8,\n",
       " 'null_sign_f555': 4,\n",
       " 'null_sign_f2090': 1,\n",
       " 'null_sign_f3328': 3,\n",
       " 'null_sign_f6088': 1,\n",
       " 'null_sign_f5588': 18,\n",
       " 'null_sign_f2896': 7,\n",
       " 'null_sign_f1111': 22,\n",
       " 'null_sign_f353': 10,\n",
       " 'null_sign_f3178': 10,\n",
       " 'null_sign_f250': 8,\n",
       " 'null_sign_f2853': 6,\n",
       " 'null_sign_f2702': 2,\n",
       " 'null_sign_f629': 19,\n",
       " 'null_sign_f649': 6,\n",
       " 'null_sign_f5508': 10,\n",
       " 'null_sign_f3436': 12,\n",
       " 'null_sign_f4095': 10,\n",
       " 'null_sign_f941': 9,\n",
       " 'null_sign_f1972': 3,\n",
       " 'null_sign_f1993': 8,\n",
       " 'null_sign_f2806': 13,\n",
       " 'null_sign_f564': 11,\n",
       " 'null_sign_f3909': 21,\n",
       " 'null_sign_f2448': 3,\n",
       " 'null_sign_f1932': 20,\n",
       " 'null_sign_f5732': 3,\n",
       " 'null_sign_f120': 1,\n",
       " 'null_sign_f840': 6,\n",
       " 'null_sign_f468': 15,\n",
       " 'null_sign_f5830': 31,\n",
       " 'null_sign_f21': 17,\n",
       " 'null_sign_f818': 5,\n",
       " 'null_sign_f1042': 4,\n",
       " 'null_sign_f3680': 3,\n",
       " 'null_sign_f124': 11,\n",
       " 'null_sign_f3991': 1,\n",
       " 'null_sign_f3967': 3,\n",
       " 'null_sign_f5413': 5,\n",
       " 'null_sign_f1041': 5,\n",
       " 'null_sign_f1558': 2,\n",
       " 'null_sign_f2215': 7,\n",
       " 'null_sign_f5257': 1,\n",
       " 'null_sign_f3765': 10,\n",
       " 'null_sign_f3817': 10,\n",
       " 'null_sign_f4344': 1,\n",
       " 'null_sign_f800': 16,\n",
       " 'null_sign_f318': 4,\n",
       " 'null_sign_f580': 2,\n",
       " 'null_sign_f5333': 1,\n",
       " 'null_sign_f1358': 1,\n",
       " 'null_sign_f3713': 5,\n",
       " 'null_sign_f6096': 4,\n",
       " 'null_sign_f268': 4,\n",
       " 'null_sign_f402': 4,\n",
       " 'null_sign_f3904': 1,\n",
       " 'null_sign_f195': 4,\n",
       " 'null_sign_f1363': 7,\n",
       " 'null_sign_f3615': 4,\n",
       " 'null_sign_f4244': 17,\n",
       " 'null_sign_f3002': 19,\n",
       " 'null_sign_f1332': 7,\n",
       " 'null_sign_f1143': 8,\n",
       " 'null_sign_f538': 7,\n",
       " 'null_sign_f2383': 1,\n",
       " 'null_sign_f3394': 5,\n",
       " 'null_sign_f494': 4,\n",
       " 'null_sign_f1870': 6,\n",
       " 'null_sign_f6690': 14,\n",
       " 'null_sign_f5922': 3,\n",
       " 'null_sign_f539': 8,\n",
       " 'null_sign_f675': 9,\n",
       " 'null_sign_f337': 2,\n",
       " 'null_sign_f5401': 11,\n",
       " 'null_sign_f547': 12,\n",
       " 'null_sign_f5692': 15,\n",
       " 'null_sign_f2295': 2,\n",
       " 'null_sign_f1343': 3,\n",
       " 'null_sign_f173': 7,\n",
       " 'null_sign_f3849': 2,\n",
       " 'null_sign_f1958': 3,\n",
       " 'null_sign_f1247': 7,\n",
       " 'null_sign_f3654': 7,\n",
       " 'null_sign_f4907': 2,\n",
       " 'null_sign_f856': 1,\n",
       " 'null_sign_f4467': 5,\n",
       " 'null_sign_f1981': 8,\n",
       " 'null_sign_f2094': 1,\n",
       " 'null_sign_f4580': 11,\n",
       " 'null_sign_f878': 20,\n",
       " 'null_sign_f1847': 4,\n",
       " 'null_sign_f684': 8,\n",
       " 'null_sign_f1115': 2,\n",
       " 'null_sign_f4872': 6,\n",
       " 'null_sign_f530': 1,\n",
       " 'null_sign_f206': 5,\n",
       " 'null_sign_f68': 2,\n",
       " 'null_sign_f5817': 8,\n",
       " 'null_sign_f6056': 23,\n",
       " 'null_sign_f1100': 3,\n",
       " 'null_sign_f5901': 1,\n",
       " 'null_sign_f1666': 3,\n",
       " 'null_sign_f2009': 3,\n",
       " 'null_sign_f548': 1,\n",
       " 'null_sign_f367': 7,\n",
       " 'null_sign_f2318': 2,\n",
       " 'null_sign_f326': 2,\n",
       " 'null_sign_f66': 1,\n",
       " 'null_sign_f6471': 2,\n",
       " 'null_sign_f1677': 1,\n",
       " 'null_sign_f73': 15,\n",
       " 'null_sign_f359': 7,\n",
       " 'null_sign_f536': 6,\n",
       " 'null_sign_f110': 5,\n",
       " 'null_sign_f645': 5,\n",
       " 'null_sign_f2': 9,\n",
       " 'null_sign_f955': 4,\n",
       " 'null_sign_f843': 6,\n",
       " 'null_sign_f524': 1,\n",
       " 'null_sign_f2036': 6,\n",
       " 'null_sign_f401': 1,\n",
       " 'null_sign_f52': 14,\n",
       " 'null_sign_f2978': 2,\n",
       " 'null_sign_f1768': 1,\n",
       " 'null_sign_f351': 5,\n",
       " 'null_sign_f1910': 8,\n",
       " 'null_sign_f1395': 5,\n",
       " 'null_sign_f2188': 1,\n",
       " 'null_sign_f143': 1,\n",
       " 'null_sign_f1544': 2,\n",
       " 'null_sign_f3749': 9,\n",
       " 'null_sign_f6454': 1,\n",
       " 'null_sign_f2445': 1,\n",
       " 'null_sign_f4697': 8,\n",
       " 'null_sign_f688': 1,\n",
       " 'null_sign_f355': 2,\n",
       " 'null_sign_f4748': 5,\n",
       " 'null_sign_f1144': 3,\n",
       " 'null_sign_f2348': 4,\n",
       " 'null_sign_f2561': 2,\n",
       " 'null_sign_f212': 2,\n",
       " 'null_sign_f1891': 4,\n",
       " 'null_sign_f2158': 1,\n",
       " 'null_sign_f5248': 2,\n",
       " 'null_sign_f5052': 3,\n",
       " 'null_sign_f897': 4,\n",
       " 'null_sign_f2918': 1,\n",
       " 'null_sign_f5484': 5,\n",
       " 'null_sign_f4143': 2,\n",
       " 'null_sign_f6122': 1,\n",
       " 'null_sign_f1149': 1,\n",
       " 'null_sign_f6545': 2,\n",
       " 'null_sign_f844': 4,\n",
       " 'null_sign_f535': 5,\n",
       " 'null_sign_f2371': 2,\n",
       " 'null_sign_f2159': 9,\n",
       " 'null_sign_f2914': 1,\n",
       " 'null_sign_f2746': 1,\n",
       " 'null_sign_f1552': 6,\n",
       " 'null_sign_f992': 5,\n",
       " 'null_sign_f4648': 2,\n",
       " 'null_sign_f295': 2,\n",
       " 'null_sign_f6020': 1,\n",
       " 'null_sign_f572': 1,\n",
       " 'null_sign_f1314': 5,\n",
       " 'null_sign_f4501': 1,\n",
       " 'null_sign_f5973': 4,\n",
       " 'null_sign_f1859': 7,\n",
       " 'null_sign_f63': 2,\n",
       " 'null_sign_f1792': 1,\n",
       " 'null_sign_f1191': 1,\n",
       " 'null_sign_f921': 1,\n",
       " 'null_sign_f6425': 2,\n",
       " 'null_sign_f3860': 5,\n",
       " 'null_sign_f877': 1,\n",
       " 'null_sign_f3018': 2,\n",
       " 'null_sign_f3028': 2,\n",
       " 'null_sign_f5983': 5,\n",
       " 'null_sign_f3150': 1,\n",
       " 'null_sign_f4551': 1,\n",
       " 'null_sign_f2681': 1,\n",
       " 'null_sign_f25': 1,\n",
       " 'null_sign_f14': 2,\n",
       " 'null_sign_f454': 12,\n",
       " 'null_sign_f4417': 3,\n",
       " 'null_sign_f5907': 1,\n",
       " 'null_sign_f5204': 3,\n",
       " 'null_sign_f5134': 1,\n",
       " 'null_sign_f2425': 2,\n",
       " 'null_sign_f6391': 1,\n",
       " 'null_sign_f424': 5,\n",
       " 'null_sign_f1551': 3,\n",
       " 'null_sign_f396': 4,\n",
       " 'null_sign_f49': 1,\n",
       " 'null_sign_f1862': 1,\n",
       " 'null_sign_f2004': 5,\n",
       " 'null_sign_f6075': 2,\n",
       " 'null_sign_f213': 6,\n",
       " 'null_sign_f721': 4,\n",
       " 'null_sign_f3779': 1,\n",
       " 'null_sign_f3486': 1,\n",
       " 'null_sign_f674': 1,\n",
       " 'null_sign_f2581': 1,\n",
       " 'null_sign_f1108': 1,\n",
       " 'null_sign_f192': 2,\n",
       " 'null_sign_f3973': 2,\n",
       " 'null_sign_f4161': 2,\n",
       " 'null_sign_f2225': 1,\n",
       " 'null_sign_f507': 3,\n",
       " 'null_sign_f6579': 1,\n",
       " 'null_sign_f990': 1,\n",
       " 'null_sign_f5094': 1,\n",
       " 'null_sign_f3142': 3,\n",
       " 'null_sign_f1000': 1,\n",
       " 'null_sign_f4464': 1,\n",
       " 'null_sign_f188': 2,\n",
       " 'null_sign_f730': 1,\n",
       " 'null_sign_f892': 2,\n",
       " 'null_sign_f2298': 1,\n",
       " 'null_sign_f1491': 1,\n",
       " 'null_sign_f5811': 2,\n",
       " 'null_sign_f1125': 3,\n",
       " 'null_sign_f177': 2,\n",
       " 'null_sign_f5950': 1,\n",
       " 'null_sign_f792': 1,\n",
       " 'null_sign_f1537': 1,\n",
       " 'null_sign_f4164': 1,\n",
       " 'null_sign_f680': 2,\n",
       " 'null_sign_f366': 1,\n",
       " 'null_sign_f5335': 2,\n",
       " 'null_sign_f4486': 1,\n",
       " 'null_sign_f476': 2,\n",
       " 'null_sign_f3636': 1,\n",
       " 'null_sign_f1913': 2,\n",
       " 'null_sign_f1625': 2,\n",
       " 'null_sign_f3571': 1,\n",
       " 'null_sign_f499': 2,\n",
       " 'null_sign_f392': 1,\n",
       " 'null_sign_f1017': 2,\n",
       " 'null_sign_f5380': 3,\n",
       " 'null_sign_f4443': 2,\n",
       " 'null_sign_f1788': 1,\n",
       " 'null_sign_f4908': 1,\n",
       " 'null_sign_f3093': 1,\n",
       " 'null_sign_f2082': 1,\n",
       " 'null_sign_f5978': 1,\n",
       " 'null_sign_f1632': 1,\n",
       " 'null_sign_f2374': 2,\n",
       " 'null_sign_f699': 1,\n",
       " 'null_sign_f1587': 1,\n",
       " 'null_sign_f1672': 1,\n",
       " 'null_sign_f1074': 1,\n",
       " 'null_sign_f3393': 1,\n",
       " 'null_sign_f5056': 1,\n",
       " 'null_sign_f165': 3,\n",
       " 'null_sign_f4321': 1,\n",
       " 'null_sign_f3509': 1,\n",
       " 'null_sign_f3658': 1,\n",
       " 'null_sign_f2553': 2,\n",
       " 'null_sign_f3936': 1,\n",
       " 'null_sign_f4789': 1,\n",
       " 'null_sign_f234': 1,\n",
       " 'null_sign_f4220': 1,\n",
       " 'null_sign_f2497': 2,\n",
       " 'null_sign_f3483': 1,\n",
       " 'null_sign_f1815': 1,\n",
       " 'null_sign_f4071': 1,\n",
       " 'null_sign_f2570': 2,\n",
       " 'null_sign_f26': 2,\n",
       " 'null_sign_f4527': 1,\n",
       " 'null_sign_f1224': 3,\n",
       " 'null_sign_f3683': 1,\n",
       " 'null_sign_f2558': 1,\n",
       " 'null_sign_f1952': 1,\n",
       " 'null_sign_f422': 1,\n",
       " 'null_sign_f35': 1,\n",
       " 'null_sign_f4264': 1,\n",
       " 'null_sign_f2584': 1,\n",
       " 'null_sign_f6007': 1,\n",
       " 'null_sign_f4460': 1,\n",
       " 'null_sign_f56': 1,\n",
       " 'null_sign_f1982': 1,\n",
       " 'null_sign_f2841': 1,\n",
       " 'null_sign_f3557': 1,\n",
       " 'null_sign_f2509': 1,\n",
       " 'null_sign_f1682': 1,\n",
       " 'null_sign_f445': 1,\n",
       " 'null_sign_f1337': 1,\n",
       " 'null_sign_f832': 1,\n",
       " 'null_sign_f2450': 1,\n",
       " 'null_sign_f4363': 1,\n",
       " 'null_sign_f1408': 1,\n",
       " 'null_sign_f72': 1}"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_33465.get_fscore()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### tag=0的部分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0]\ttrain-auc:0.643012\n",
      "[10]\ttrain-auc:0.683014\n",
      "[20]\ttrain-auc:0.699693\n",
      "[30]\ttrain-auc:0.709707\n",
      "[40]\ttrain-auc:0.718901\n",
      "[50]\ttrain-auc:0.725988\n",
      "[60]\ttrain-auc:0.732488\n",
      "[70]\ttrain-auc:0.738239\n",
      "[80]\ttrain-auc:0.742093\n",
      "[90]\ttrain-auc:0.746669\n",
      "[100]\ttrain-auc:0.749507\n",
      "[110]\ttrain-auc:0.753453\n",
      "[120]\ttrain-auc:0.756372\n",
      "[130]\ttrain-auc:0.759052\n",
      "[140]\ttrain-auc:0.761537\n",
      "[149]\ttrain-auc:0.764038\n"
     ]
    }
   ],
   "source": [
    "import xgboost as xgb\n",
    "dtrain = xgb.DMatrix(x[:-3000],y[:-3000],feature_names=data.columns)\n",
    "params={\n",
    "'booster':'gbtree',\n",
    "'objective': 'binary:logistic',\n",
    "'early_stopping_rounds':100,\n",
    "'scale_pos_weight': float(len(y)-np.sum(y))/float(np.sum(y)),  # 负例样本除以正例样本\n",
    "'eval_metric': 'auc',\n",
    "'gamma':0.1,\n",
    "'max_depth':3,\n",
    "'lambda':10,\n",
    "'subsample':0.9,\n",
    "'colsample_bytree':0.9,\n",
    "'eta': 0.04,\n",
    "'seed':2018,\n",
    "'nthread':18\n",
    "    }\n",
    "watchlist  = [(dtrain,'train')]\n",
    "model_30465 = xgb.train(params,dtrain,num_boost_round=150,evals=watchlist, verbose_eval=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['model_30465']"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joblib.dump(model_30465,'model_30465')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## tag=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0]\ttrain-auc:0.619081\n",
      "[10]\ttrain-auc:0.693724\n",
      "[20]\ttrain-auc:0.707035\n",
      "[30]\ttrain-auc:0.714302\n",
      "[40]\ttrain-auc:0.725864\n",
      "[50]\ttrain-auc:0.734763\n",
      "[60]\ttrain-auc:0.740762\n",
      "[70]\ttrain-auc:0.744774\n",
      "[80]\ttrain-auc:0.75014\n",
      "[90]\ttrain-auc:0.757322\n",
      "[100]\ttrain-auc:0.764599\n",
      "[110]\ttrain-auc:0.770274\n",
      "[120]\ttrain-auc:0.776341\n",
      "[130]\ttrain-auc:0.780004\n",
      "[140]\ttrain-auc:0.784639\n",
      "[150]\ttrain-auc:0.788107\n",
      "[160]\ttrain-auc:0.791566\n",
      "[170]\ttrain-auc:0.795649\n",
      "[179]\ttrain-auc:0.800997\n"
     ]
    }
   ],
   "source": [
    "import xgboost as xgb\n",
    "dtrain = xgb.DMatrix(x[-3000:],y[-3000:],feature_names=data.columns)\n",
    "params={\n",
    "'booster':'gbtree',\n",
    "'objective': 'binary:logistic',\n",
    "'early_stopping_rounds':100,\n",
    "'scale_pos_weight': float(len(y)-np.sum(y))/float(np.sum(y)),  # 负例样本除以正例样本\n",
    "'eval_metric': 'auc',\n",
    "'gamma':0.1,\n",
    "'max_depth':2,\n",
    "'lambda':10,\n",
    "'subsample':0.9,\n",
    "'colsample_bytree':0.9,\n",
    "'eta': 0.04,\n",
    "'seed':2018,\n",
    "'nthread':18\n",
    "    }\n",
    "watchlist  = [(dtrain,'train')]\n",
    "model_3000 = xgb.train(params,dtrain,num_boost_round=180,evals=watchlist, verbose_eval=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['model_3000']"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joblib.dump(model_3000,'model_3000')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
